In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.patches as mpatches
from matplotlib import ticker
from plotly.subplots import make_subplots
from wordcloud import WordCloud
import matplotlib.dates as md
import seaborn as sns
In [2]:
sns.set(color_codes = True)
sns.set(style="whitegrid")
pd.set_option('display.max_columns', None)
df=pd.read_csv('country_vaccinations.csv')
print(df.head())
       country iso_code        date  total_vaccinations  people_vaccinated  \
0  Afghanistan      AFG  2021-02-22                 0.0                0.0   
1  Afghanistan      AFG  2021-02-23                 NaN                NaN   
2  Afghanistan      AFG  2021-02-24                 NaN                NaN   
3  Afghanistan      AFG  2021-02-25                 NaN                NaN   
4  Afghanistan      AFG  2021-02-26                 NaN                NaN   

   people_fully_vaccinated  daily_vaccinations_raw  daily_vaccinations  \
0                      NaN                     NaN                 NaN   
1                      NaN                     NaN              1367.0   
2                      NaN                     NaN              1367.0   
3                      NaN                     NaN              1367.0   
4                      NaN                     NaN              1367.0   

   total_vaccinations_per_hundred  people_vaccinated_per_hundred  \
0                             0.0                            0.0   
1                             NaN                            NaN   
2                             NaN                            NaN   
3                             NaN                            NaN   
4                             NaN                            NaN   

   people_fully_vaccinated_per_hundred  daily_vaccinations_per_million  \
0                                  NaN                             NaN   
1                                  NaN                            35.0   
2                                  NaN                            35.0   
3                                  NaN                            35.0   
4                                  NaN                            35.0   

                                            vaccines  \
0  Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm...   
1  Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm...   
2  Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm...   
3  Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm...   
4  Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm...   

                 source_name            source_website  
0  World Health Organization  https://covid19.who.int/  
1  World Health Organization  https://covid19.who.int/  
2  World Health Organization  https://covid19.who.int/  
3  World Health Organization  https://covid19.who.int/  
4  World Health Organization  https://covid19.who.int/  
In [3]:
print(df.info())
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21628 entries, 0 to 21627
Data columns (total 15 columns):
 #   Column                               Non-Null Count  Dtype  
---  ------                               --------------  -----  
 0   country                              21628 non-null  object 
 1   iso_code                             21628 non-null  object 
 2   date                                 21628 non-null  object 
 3   total_vaccinations                   12275 non-null  float64
 4   people_vaccinated                    11479 non-null  float64
 5   people_fully_vaccinated              8967 non-null   float64
 6   daily_vaccinations_raw               10187 non-null  float64
 7   daily_vaccinations                   21412 non-null  float64
 8   total_vaccinations_per_hundred       12275 non-null  float64
 9   people_vaccinated_per_hundred        11479 non-null  float64
 10  people_fully_vaccinated_per_hundred  8967 non-null   float64
 11  daily_vaccinations_per_million       21412 non-null  float64
 12  vaccines                             21628 non-null  object 
 13  source_name                          21628 non-null  object 
 14  source_website                       21628 non-null  object 
dtypes: float64(9), object(6)
memory usage: 2.5+ MB
None
In [4]:
print(df.isnull().sum())
country                                    0
iso_code                                   0
date                                       0
total_vaccinations                      9353
people_vaccinated                      10149
people_fully_vaccinated                12661
daily_vaccinations_raw                 11441
daily_vaccinations                       216
total_vaccinations_per_hundred          9353
people_vaccinated_per_hundred          10149
people_fully_vaccinated_per_hundred    12661
daily_vaccinations_per_million           216
vaccines                                   0
source_name                                0
source_website                             0
dtype: int64
In [5]:
corr = df.corr(method ='pearson')
plt.figure(figsize=(20, 6))
sns.heatmap(corr, annot=True)
Out[5]:
<AxesSubplot:>
In [6]:
df.fillna(0, inplace = True)
df['iso_code'].fillna('GBR', inplace=True)
df.drop(df.index[df['iso_code'] == 0], inplace = True)
df.drop(["source_name","source_website"],axis=1, inplace=True)
In [7]:
print(df.head(5))
       country iso_code        date  total_vaccinations  people_vaccinated  \
0  Afghanistan      AFG  2021-02-22                 0.0                0.0   
1  Afghanistan      AFG  2021-02-23                 0.0                0.0   
2  Afghanistan      AFG  2021-02-24                 0.0                0.0   
3  Afghanistan      AFG  2021-02-25                 0.0                0.0   
4  Afghanistan      AFG  2021-02-26                 0.0                0.0   

   people_fully_vaccinated  daily_vaccinations_raw  daily_vaccinations  \
0                      0.0                     0.0                 0.0   
1                      0.0                     0.0              1367.0   
2                      0.0                     0.0              1367.0   
3                      0.0                     0.0              1367.0   
4                      0.0                     0.0              1367.0   

   total_vaccinations_per_hundred  people_vaccinated_per_hundred  \
0                             0.0                            0.0   
1                             0.0                            0.0   
2                             0.0                            0.0   
3                             0.0                            0.0   
4                             0.0                            0.0   

   people_fully_vaccinated_per_hundred  daily_vaccinations_per_million  \
0                                  0.0                             0.0   
1                                  0.0                            35.0   
2                                  0.0                            35.0   
3                                  0.0                            35.0   
4                                  0.0                            35.0   

                                            vaccines  
0  Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm...  
1  Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm...  
2  Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm...  
3  Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm...  
4  Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm...  
In [8]:
# Function to PlotMap
def plot_map(variable, title, color):
    new_dict = {}

    for country in df.country.unique():

        for i in range(len(df)):
            #print(df.country[i])
            if df.country[i] == country:
                new_dict[country] = df[variable][i]

    new_dict

    new_df = pd.DataFrame.from_dict(new_dict, orient='index', columns=[variable])

    if color == None:

        place_map = px.choropleth(new_df, locations=new_df.index,
                                  locationmode='country names',
                                  color=variable,
                                  hover_name=new_df.index)

    else:

        place_map = px.choropleth(new_df, locations=new_df.index,
                                  locationmode='country names',
                                  color=variable,
                                  hover_name=new_df.index,
                                  color_continuous_scale=color)

    place_map.update_layout(
        title_text=title,
        title_x=0.5,
        geo=dict(showocean=True, oceancolor="#7af8ff",
                 showland=True, landcolor="white",
                 showframe=False))

    return place_map.show()
In [9]:
#Function to PlotPie
def plot_pie(value, title, color):
    new_dict = {}

    for v in df[value].unique():
        value_count = 0

        for i in range(len(df)):
            # print(df[value].iloc[i])
            # print(value)
            if df[value].iloc[i] == v:
                value_count += 1

        new_dict[v] = value_count

        # print(new_dict)

    new_df = pd.DataFrame.from_dict(new_dict, orient='index', columns=['Total'])

    if color == 'plasma':

        fig = px.pie(new_df, values='Total',
                     names=new_df.index,
                     title=title,
                     color_discrete_sequence=px.colors.sequential.Plasma)

    elif color == 'rainbow':

        fig = px.pie(new_df, values='Total',
                     names=new_df.index,
                     title=title,
                     color_discrete_sequence=px.colors.sequential.Rainbow)

    else:

        fig = px.pie(new_df, values='Total',
                     names=new_df.index,
                     title=title)

    fig.update_layout(
        title={
            'y': 0.95,
            'x': 0.5
        },
        # legend_title = value
    )

    return fig.show()
In [10]:
#What vaccines are used and in which countries?
plot_map('vaccines','Vaccines Used in Different countries', None)
In [11]:
#Which vaccine is used in most of the countries?
plot_pie('vaccines', 'Various vaccines and their uses', 'plasma')
In [12]:
plt.figure(figsize=(20, 6))
data = df.groupby('country')['total_vaccinations_per_hundred'].max().sort_values(ascending=False)[:50]
plt.bar(data.index, data);
plt.xticks(rotation='90')
plt.yticks(np.arange(10.0, 160.0, step=20.0))
plt.title('Country vise Total Vaccination per Hundred', fontsize=24, fontweight='bold')
plt.ylabel('total vaccination per hundred');
plt.show()
In [13]:
#What country has vaccinated a larger percent from its population?
# from above Graph, Gibraltar is with the most people vaccinated (per hundred). May be due to less population (33,701 in 2019).
import ax as ax
import np as np
df_India = df[df["iso_code"] == 'IND'].copy()
df_India['date'] = pd.to_datetime(df['date'], format = '%Y-%m-%d')
plt.figure(figsize=(20,7))
sns.lineplot(data=df_India,x="date",y="people_vaccinated_per_hundred",marker='d',markersize= 12, color = 'k')
plt.title("India's daily vaccinations population trend")
plt.xticks(rotation=45)
plt.show();
In [14]:
plt.figure(figsize=(20,7))
sns.lineplot(data=df_India,x="date",y="daily_vaccinations_per_million",marker='o')
plt.title("India's daily vaccinations population trend")
plt.show
Out[14]:
<function matplotlib.pyplot.show(close=None, block=None)>
In [15]:
#People Vaccinated
plt.figure(figsize=(20,7))
sns.lineplot(data=df_India,x="date",y="people_vaccinated",marker='o')
plt.show()
In [ ]:
iso_code=df.iso_code.unique()

for row in iso_code:
  df_county = df[df["iso_code"] == row].copy()
  df_county['date'] = pd.to_datetime(df['date'], format = '%Y-%m-%d')
  plt.figure(figsize=(20,7))
  sns.lineplot(data=df_county,x="date",y="people_vaccinated_per_hundred",marker='d',markersize= 12, color = 'k')
  plt.title('daily vaccinations population trend = %s' % (row))
  plt.xticks(rotation=45)
  plt.show();
  plt.figure(figsize=(20,7))
  sns.lineplot(data=df_county,x="date",y="daily_vaccinations_per_million",marker='o')
  plt.title('daily vaccinations population trend = %s' % (row))
  plt.show();
  #People Vaccinated
  plt.figure(figsize=(20,7))
  sns.lineplot(data=df_county,x="date",y="people_vaccinated",marker='o')
  plt.show();
In [ ]: